##### ####################################################
#####                                               ######
#####             Participation Analysis            
#####                                               ######
##### ####################################################

# init ------------------------------------------------------------

rm(list=ls())
set.seed(221186)

# Load libraries

library(data.table) # 1.11.4
library(mgcv) # 1.8-24
library(stargazer) # 5.2.2
library(DataCombine) # 0.2.21 for slide function
library(scales) # 1.0.0

# Some additional functions
source("utils.R")

# Load data

load("data/speeches.Rdata")

sink("latex/tables/usefulNumbers/total_debates_in_data.tex")
cat(length(unique(speeches$subsection_id)))
sink()

# Options

nBootstrapReps <- 1000

### ################################################
### Prepare data
### ################################################

out <- speeches[minister_in_debate==F & is_speaker == F ,
                list(prop.women.words = sum(word_count[Gender == "F"], na.rm = T)/sum(word_count),
                     prop.women.speeches = length(body[Gender == "F"])/length(body),
                     ratio.women.words = (sum(word_count[Gender == "F"],na.rm=T)/sum(word_count))/unique(prop_women),
                     ratio.women.speeches = (length(body[Gender == "F"])/length(body))/unique(prop_women),
                     minister.gender = unique(minister_gender),
                     opp.minister.gender = unique(opp_minister_gender),
                     speaker.gender = unique(speaker_gender),
                     minister.name = unique(minister_name),
                     ministry = unique(debate_department),
                     opp.ministry = unique(opp_debate_department),
                     yearmon = unique(yearmon),
                     model_weights = .N), 
                by = list(subsection_id, debate_department)]

out$ministry <- as.factor(out$ministry)
out$opp.ministry <- as.factor(out$opp.ministry)

out$yearmon.numeric <- as.numeric(out$yearmon)-1997

### ################################################
### Models
### ################################################

# Bespoke functions to estimate models, output latex tables, and calculate effect sizes

effect.sizes <- function(mod.list, title = "Proportion of words", type = "Bootstrap", baseline){
  gam.mod <- mod.list[[7]]
  mod.list <- mod.list[1:6]
  effects.se <- data.frame(mod = 1:6, do.call("rbind",lapply(mod.list, function(x) effect.func(x, baseline = baseline, se = "se"))), se = "Regular")
  effects.clus <- data.frame(mod = 1:6, do.call("rbind",lapply(mod.list, function(x) effect.func(x, baseline = baseline, se = "clustered.se"))), se = "Clustered")
  effects.boot <- data.frame(mod = 1:6, do.call("rbind",lapply(mod.list, function(x) effect.func(x, baseline = baseline, se = "boot.se"))), se = "Bootstrap")
  effects.gam <- data.frame(mod = 7, t(effect.func.gam(gam.mod, baseline = baseline)), se = "Regular")
  
  effects.all <- rbind(
    effects.se,
    effects.clus,
    effects.boot,
    effects.gam)
  
  effects.all$title <- title
  
  effects <- effects.all[effects.all$se==type | effects.all$mod==7, c("effect", "upper", "lower")]
  
  
  if(type != "all") return(effects)
  if(type == "all") return(effects.all)
  
}

main_reg_func <- function(dv = "prop.women.words", my_data = out, nBootstrapReps = 10, file_out = "latex/tables/words_prop_boot_tables.tex", ...){
  
  mod.1 <- as.formula(paste0(dv, " ~ minister.gender"))
  boot.mod.1 <- bootstrap.function(mod.1, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.2 <- as.formula(paste0(dv, " ~ minister.gender + as.factor(yearmon)"))
  boot.mod.2 <- bootstrap.function(mod.2, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.3 <- as.formula(paste0(dv, " ~ minister.gender + ministry"))
  boot.mod.3 <- bootstrap.function(mod.3, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.4 <- as.formula(paste0(dv, " ~ minister.gender + ministry + as.factor(yearmon)"))
  boot.mod.4 <- bootstrap.function(mod.4, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.5 <- as.formula(paste0(dv, " ~ minister.gender + ministry*yearmon.numeric + as.factor(yearmon)"))
  boot.mod.5 <- bootstrap.function(mod.5, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.6 <- as.formula(paste0(dv, " ~ minister.gender + ministry*poly(yearmon.numeric,2) + as.factor(yearmon)"))
  boot.mod.6 <- bootstrap.function(mod.6, cluster="ministry", data=my_data, reps=nBootstrapReps)
  
  mod.gam <- as.formula(paste0(dv," ~ minister.gender  + s(as.numeric(yearmon),by=ministry) + ministry + as.factor(yearmon)"))
  boot.mod.gam <- gam(mod.gam, data=my_data)
  
  mod.list <- list(boot.mod.1, boot.mod.2, boot.mod.3, boot.mod.4, boot.mod.5, boot.mod.6, boot.mod.gam)
  
  baseline <- my_data[,mean(get(dv)),by=minister.gender]$V1[1]
  
  mod.effects <- effect.sizes(mod.list, type = "Bootstrap", baseline = baseline)
  
  return(list(mod.list, mod.effects))
  
}

stargazer.multi.func <- function(boot.out.list,file_out,...){
 
  boot.mod.1 <- boot.out.list[[1]][[1]]
  boot.mod.2 <- boot.out.list[[1]][[2]]
  boot.mod.3 <- boot.out.list[[1]][[3]]
  boot.mod.4 <- boot.out.list[[1]][[4]]
  boot.mod.5 <- boot.out.list[[1]][[5]]
  boot.mod.6 <- boot.out.list[[1]][[6]]
  boot.mod.gam <- boot.out.list[[1]][[7]]
  
  mod.effects <- boot.out.list[[2]]
  
  
  month.fe <- c("Month FEs","$\\times$","\\checkmark","$\\times$","\\checkmark","\\checkmark","\\checkmark","\\checkmark")
  ministry.fe <- c("Ministry FEs","$\\times$","$\\times$","\\checkmark","\\checkmark","\\checkmark","\\checkmark","\\checkmark")
  linear.tt <- c("Linear time trends","$\\times$","$\\times$","$\\times$","$\\times$","\\checkmark","\\checkmark","$\\times$")
  quadratic.tt <- c("Quadratic time trends","$\\times$","$\\times$","$\\times$","$\\times$","$\\times$","\\checkmark","$\\times$")
  smooths <- c("Flexible time trends","$\\times$", "$\\times$","$\\times$","$\\times$","$\\times$","$\\times$","\\checkmark")
  
  sink(file=file_out)
  mod_stargazer(
    boot.mod.1, boot.mod.2, boot.mod.3, boot.mod.4, boot.mod.5, boot.mod.6, boot.mod.gam,
    intercept.top=T, intercept.bottom=F, keep=c(1,2), order=c(2,1), 
    add.lines = list(month.fe, ministry.fe, linear.tt, quadratic.tt, smooths,
                     c("Effect Size \\%", mod.effects[,1]),
                     c("95\\% CI",paste("[", mod.effects[,3],",", mod.effects[,2],"]",sep=""))), 
    covariate.labels=c("Female minister","Constant"),
    column.sep.width="0.25pt", keep.stat=c("n","rsq","adj.rsq"),
    dep.var.caption="",
    se=list(boot.mod.1$boot.se , boot.mod.2$boot.se , boot.mod.3$boot.se, boot.mod.4$boot.se, boot.mod.5$boot.se, boot.mod.6$boot.se, summary(boot.mod.gam)$se), 
    no.space=T, model.names=F,...)
  sink()
  
}


# Proportion words ------------------------------------------------------------ 

prop.women.words.out <- main_reg_func(dv = "prop.women.words", 
                                      data = out, 
                                      nBootstrapReps = nBootstrapReps)

stargazer.multi.func(prop.women.words.out, 
                                      file_out = "latex/tables/words_prop_boot_tables.tex", 
                                      title="", 
                                      label="main_results_prop_boot",
                     dep.var.labels = "\\emph{PropWordsWomen}")

# Ratio words ------------------------------------------------------------ 

ratio.women.words.out <- main_reg_func(dv = "ratio.women.words", 
                                      data = out, 
                                      nBootstrapReps = nBootstrapReps)

stargazer.multi.func(ratio.women.words.out, 
                     file_out = "latex/tables/words_ratio_boot_tables.tex", 
                     title="", 
                     label="main_results_ratio_boot",
                     dep.var.labels = "\\emph{RatioWordsWomen}")

# Proportion speeches ------------------------------------------------------------ 

prop.women.speeches.out <- main_reg_func(dv = "prop.women.speeches", 
                                       data = out, 
                                       nBootstrapReps = nBootstrapReps)

stargazer.multi.func(prop.women.speeches.out, 
                     file_out = "latex/tables/speeches_prop_boot_tables.tex", 
                     title="", 
                     label="speech_results_prop_boot",
                     dep.var.labels = "\\emph{PropSpeechesWomen}")

# Ratio speeches ------------------------------------------------------------ 

ratio.women.speeches.out <- main_reg_func(dv = "ratio.women.speeches", 
                                         data = out, 
                                         nBootstrapReps = nBootstrapReps)

stargazer.multi.func(ratio.women.speeches.out, 
                     file_out = "latex/tables/speeches_ratio_boot_tables.tex", 
                     title="", 
                     label="speech_results_ratio_boot",
                     dep.var.labels = "\\emph{RatioSpeechesWomen}")


# Leads and lags ------------------------------------------------------------ 

speeches$yrqtr <- as.yearqtr(speeches$hdate)

lag.size <- 6
nreps <- nBootstrapReps

out <- speeches[minister_in_debate==F & is_speaker == F,
        list(
          prop.women.words=sum(word_count[Gender=="F"],na.rm=T)/sum(word_count),
          prop.women.speeches=length(body[Gender=="F"])/length(body),
          ratio.women.words=(sum(word_count[Gender=="F"],na.rm=T)/sum(word_count))/unique(prop_women),
          ratio.women.speeches=(length(body[Gender=="F"])/length(body))/unique(prop_women),
          minister.gender=unique(minister_gender),
          minister.name=unique(minister_name),
          gov=unique(gov),
          Speech.Date=unique(hdate)[1],
          yearmon=unique(yearmon)), by=list(subsection_id, debate_department)]

out$debate_department <- as.factor(out$debate_department)

setkey(out,debate_department,Speech.Date)

out$d.star <- out$minister.gender=="F"
out$change <- 0
out$change[which(diff(out$d.star)==1)+1] <- 1

xx <- data.frame(out)

out.list <- list()
i<-0
for(slide in c(-3:-1,1:3)){
  i <- i+ 1
  out.list[[i]] <- slide(xx,Var="change",GroupVar = "debate_department",TimeVar = "yearmon",slideBy = slide)[,dim(xx)[2]+1]
}
out <- data.table(cbind(xx,do.call("cbind",out.list)))

setnames(out,names(out)[names(out)%in%c(1,2,3,4,5,6)],c("lag3","lag2","lag1","lead1","lead2","lead3"))


## Average tenure of male and female ministers

tenure <- out[,list(tenure=max(Speech.Date)-min(Speech.Date),minister.gender=unique(minister.gender)),by=list(minister.name,debate_department)]
tenure <- tenure[,list(median=median(tenure), mean=mean(tenure)),by=minister.gender]
tenure$mean <- round(as.numeric(tenure$mean/31)) # Define a month as 31 days
tenure$median <- round(as.numeric(tenure$median/31)) # Define a month as 31 days

setkey(out,debate_department,Speech.Date)

one.month <- as.Date("1997-02-01") - as.Date("1997-01-01") # Define length of a month

distances <- c(one.month*seq(0,50,6))

out$d.star <- out$minister.gender=="F"
out$change <- 0
out$change[which(diff(out$d.star)==1)+1] <- 1
episodes <- out[change==1]

nlags <- 3
nleads <- 4

## Define lags
lag <- 2
for(lag in 1:nlags){ # Loop over lags
  
  start.distance <- distances[lag] # Define distance for beginning of lag
  end.distance <- distances[lag+1] # Define distance for end of lag
  out[[paste("lag",lag,sep="")]] <- 0 # Define lag
  
  for(e in 1:dim(episodes)[1]) out[[paste("lag",lag,sep="")]][out$debate_department==episodes[e]$debate_department & out$Speech.Date>=(episodes[e]$Speech.Date+ start.distance) & out$Speech.Date<(episodes[e]$Speech.Date+end.distance)] <- 1 # Lag is equal to one when debate falls within defined distances of date range
  
}

# Clean up lags. (remove instances when lag is equal to 1, when d.star is equal to FALSE)

out[[paste0("lag",nlags+1)]] <- as.numeric(as.vector(unlist(out[,"d.star",with=F] - rowSums(out[,grep("lag",names(out)),with=F],na.rm=T)))) # Define residual lag category

for(lag in 1:(nlags+1)) out[[paste0("lag",lag)]][out[[paste0("lag",nlags+1)]]==-1] <- 0 # Remove instances where the final lag is equal to -1 (these are the cases where some lags are equal to 1, when d.star is equal to FALSE)

## Define leads

for(lead in 1:nleads){ # Loop over lags
  
  start.distance <- distances[lead] # Define distance for beginning of lead
  end.distance <- distances[lead+1] # Define distance for end of lead
  out[[paste("lead",lead,sep="")]] <- 0 # Define lead
  
  for(e in 1:dim(episodes)[1]) out[[paste("lead",lead,sep="")]][out$debate_department==episodes[e]$debate_department & out$Speech.Date<(episodes[e]$Speech.Date- start.distance) & out$Speech.Date>(episodes[e]$Speech.Date-end.distance)] <- 1 # lead is equal to one when debate falls within defined distances of date range
  
}

# Clean up lags. (remove instances when lag is equal to 1, when d.star is equal to TRUE)

for(lead in 1:(nleads)) out[[paste0("lead",lead)]][out$d.star==1] <- 0 # Remove instances where the final lag is equal to -1 (these are the cases where some lags are equal to 1, when d.star is equal to FALSE)

## Run lags and leads model
out$yearmon_numeric <- as.numeric(out$yearmon)

lead_lag_formula <- as.formula(paste("prop.women.words", "~", paste(names(out)[grep("lag",names(out))],collapse=" + "), "+", paste(names(out)[grep("lead",names(out))],collapse=" + "), "+", "as.factor(yearmon) + debate_department"))

mod.out <- lm(lead_lag_formula ,data=out)

## Bootstrap lags and leads model

cat(paste0("Bootstrapping lags and leads model. ", nreps, " bootstrap samples."))
boot.coef.mat <- matrix(NA, nreps, length(coef(mod.out)))
  for(boot in 1:nreps){
cat(".")
    tmp_boot_coefs <- coef(lm(lead_lag_formula , data = out[block.bootstrap(out$debate_department),]))
    boot.coef.mat[boot,] <- tmp_boot_coefs[match(names(coef(mod.out)), names(tmp_boot_coefs))]
    
}

mod.out.boot.se <- apply(boot.coef.mat,2,function(x) sd(x,na.rm = T))

## Assign clustered errors
mod.out.clus.se <- coeftest(mod.out,vcov=vcovCluster(mod.out,out$debate_department))[,2]
mod.out.se <- coef(summary(mod.out))[,2] 


#mod.out.boot.se <- mod.out.clus.se

## Extract coefficients, standard errors, and calculate confidence intervals
lag.coefs <- data.table(coef.name=as.numeric(gsub("[[:alpha:]]","",names(coef(mod.out)[grep("lag",names(coef(mod.out)))])))-1,coef=coef(mod.out)[grep("lag",names(coef(mod.out)))])
lag.se <- mod.out.boot.se[grep("lag",names(coef(mod.out)))]
lag.coefs$upper <- lag.coefs$coef + lag.se*1.96
lag.coefs$lower <- lag.coefs$coef - lag.se*1.96
lead.coefs <- data.table(coef.name=-as.numeric(gsub("[[:alpha:]]","",names(coef(mod.out)[grep("lead",names(coef(mod.out)))]))),coef=coef(mod.out)[grep("lead",names(coef(mod.out)))])
lead.se <- mod.out.boot.se[grep("lead",names(coef(mod.out)))]
lead.coefs$upper <- lead.coefs$coef + lead.se*1.96
lead.coefs$lower <- lead.coefs$coef - lead.se*1.96

combined <- rbind(lag.coefs,lead.coefs)

male.col <- "#E69F00"
female.col <- "#0072B2"
background.col <- "transparent"

## Plot
png(paste0("plots/leads_lags_",lag.size,"_months.png"),900,600)
par(cex=1.8, bg= background.col, lwd=1.4, mar=c(4,4,.5,1)+0.1)
plot(combined$coef.name,combined$coef,xlab="Months relative to appointment of female minister",ylab="Effect of female minister on words spoken by women",col=alpha("black",1),pch=19,ylim=c((min(combined$lower)-0.04),(max(combined$upper)+0.04)),bty="n",xaxt="n")
abline(h=0,v=-0.5,lty=2)
segments(x0=lag.coefs$coef.name,x1=lag.coefs$coef.name,y0=lag.coefs$lower,y1=lag.coefs$upper)
segments(x0=lead.coefs$coef.name,x1=lead.coefs$coef.name,y0=lead.coefs$lower,y1=lead.coefs$upper)
#legend("topleft",legend=c("Lead/Lag coefficient with\n 95% confidence interval"),bty="n",pch=19,lty=1)
labs <- c("18-24", "12-18", "6-12", "0-6", "0-6", "6-12", "12-18","18+")
axis(1,at=seq(min(lead.coefs$coef.name),max(lag.coefs$coef.name)),labels=labs)
#dev.off()
dev.off()

